09 Ejercicio regresión armónica dinámica

Eddie Aguilar Ernesto Morales

library("easypackages")
packages("tidyverse","fpp3", "tsibble", "feasts","fable", "patchwork")
Loading required package: tidyverse
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.0     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.2.0
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Loading required package: fpp3

── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──

✔ tsibble     1.1.3     ✔ fable       0.3.2
✔ tsibbledata 0.4.1     ✔ fabletools  0.3.2
✔ feasts      0.3.0     

── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date()    masks base::date()
✖ dplyr::filter()      masks stats::filter()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval()  masks lubridate::interval()
✖ dplyr::lag()         masks stats::lag()
✖ tsibble::setdiff()   masks base::setdiff()
✖ tsibble::union()     masks base::union()

Loading required package: patchwork

All packages loaded successfully
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
vic_elec
# A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
   Time                Demand Temperature Date       Holiday
   <dttm>               <dbl>       <dbl> <date>     <lgl>  
 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE   
 7 2012-01-01 03:00:00  3694.        20.1 2012-01-01 TRUE   
 8 2012-01-01 03:30:00  3562.        19.6 2012-01-01 TRUE   
 9 2012-01-01 04:00:00  3433.        19.1 2012-01-01 TRUE   
10 2012-01-01 04:30:00  3359.        19.0 2012-01-01 TRUE   
# … with 52,598 more rows

Usando datos cada hora:

(vic_elec_hour <- index_by(vic_elec, time = ~ floor_date(., unit="hour")) |> 
  summarise(demand = sum(Demand),
         mean_temp = mean(Temperature),
         max_temp = max(Temperature),
         holiday = any(Holiday)) |> 
    mutate(
      day_type = case_when(
        holiday ~ "holiday",
        wday(time) %in% 2:6 ~ "weekday",
        TRUE ~ "weekend"
      )))
# A tsibble: 26,304 x 6 [1h] <Australia/Melbourne>
   time                demand mean_temp max_temp holiday day_type
   <dttm>               <dbl>     <dbl>    <dbl> <lgl>   <chr>   
 1 2012-01-01 00:00:00  8646.      21.2     21.4 TRUE    holiday 
 2 2012-01-01 01:00:00  7927.      20.6     20.7 TRUE    holiday 
 3 2012-01-01 02:00:00  7902.      20.3     20.4 TRUE    holiday 
 4 2012-01-01 03:00:00  7256.      19.8     20.1 TRUE    holiday 
 5 2012-01-01 04:00:00  6793.      19.0     19.1 TRUE    holiday 
 6 2012-01-01 05:00:00  6636.      18.7     18.8 TRUE    holiday 
 7 2012-01-01 06:00:00  6548.      18.7     18.8 TRUE    holiday 
 8 2012-01-01 07:00:00  6865.      19.6     20.1 TRUE    holiday 
 9 2012-01-01 08:00:00  7300.      21.8     22.6 TRUE    holiday 
10 2012-01-01 09:00:00  8002.      24.6     25.2 TRUE    holiday 
# … with 26,294 more rows
p <- autoplot(vic_elec_hour, demand, color = "blue")

ggplotly(p, dynamicTicks = TRUE) |> rangeslider()
vic_elec_hour |> 
  ggplot(aes(x = mean_temp, y = demand, 
             color = holiday)) +
  geom_point(alpha = 0.5)

vic_elec_hour |> 
  ggplot(aes(x = mean_temp, y = demand, 
             color = day_type)) +
  geom_point(alpha = 0.5)

(train <- filter(vic_elec_hour, year(time) %in% 2012:2015))
# A tsibble: 26,304 x 6 [1h] <Australia/Melbourne>
   time                demand mean_temp max_temp holiday day_type
   <dttm>               <dbl>     <dbl>    <dbl> <lgl>   <chr>   
 1 2012-01-01 00:00:00  8646.      21.2     21.4 TRUE    holiday 
 2 2012-01-01 01:00:00  7927.      20.6     20.7 TRUE    holiday 
 3 2012-01-01 02:00:00  7902.      20.3     20.4 TRUE    holiday 
 4 2012-01-01 03:00:00  7256.      19.8     20.1 TRUE    holiday 
 5 2012-01-01 04:00:00  6793.      19.0     19.1 TRUE    holiday 
 6 2012-01-01 05:00:00  6636.      18.7     18.8 TRUE    holiday 
 7 2012-01-01 06:00:00  6548.      18.7     18.8 TRUE    holiday 
 8 2012-01-01 07:00:00  6865.      19.6     20.1 TRUE    holiday 
 9 2012-01-01 08:00:00  7300.      21.8     22.6 TRUE    holiday 
10 2012-01-01 09:00:00  8002.      24.6     25.2 TRUE    holiday 
# … with 26,294 more rows
# maximos:
# year = 4380
# week = 84
# day = 12

fit <- train |>  
  model(
    Fourier = TSLM(demand ~ mean_temp + I(mean_temp^2) + day_type + fourier(period = "year", K = 10) + 
                      fourier(period = "week", K = 8) + 
                      fourier(period = "day", K = 6)))
glance(fit)
# A tibble: 1 × 15
  .model  r_squared adj_r_s…¹ sigma2 stati…² p_value    df log_lik    AIC   AICc
  <chr>       <dbl>     <dbl>  <dbl>   <dbl>   <dbl> <int>   <dbl>  <dbl>  <dbl>
1 Fourier     0.859     0.859 4.30e5   3193.       0    51 -2.08e5 3.41e5 3.41e5
# … with 5 more variables: BIC <dbl>, CV <dbl>, deviance <dbl>,
#   df.residual <int>, rank <int>, and abbreviated variable names
#   ¹​adj_r_squared, ²​statistic
aug <- augment(fit)
aug
# A tsibble: 26,304 x 6 [1h] <Australia/Melbourne>
# Key:       .model [1]
   .model  time                demand .fitted .resid .innov
   <chr>   <dttm>               <dbl>   <dbl>  <dbl>  <dbl>
 1 Fourier 2012-01-01 00:00:00  8646.   5763.  2883.  2883.
 2 Fourier 2012-01-01 01:00:00  7927.   5305.  2622.  2622.
 3 Fourier 2012-01-01 02:00:00  7902.   4600.  3302.  3302.
 4 Fourier 2012-01-01 03:00:00  7256.   3875.  3381.  3381.
 5 Fourier 2012-01-01 04:00:00  6793.   3387.  3406.  3406.
 6 Fourier 2012-01-01 05:00:00  6636.   3355.  3281.  3281.
 7 Fourier 2012-01-01 06:00:00  6548.   3840.  2708.  2708.
 8 Fourier 2012-01-01 07:00:00  6865.   4715.  2150.  2150.
 9 Fourier 2012-01-01 08:00:00  7300.   5622.  1679.  1679.
10 Fourier 2012-01-01 09:00:00  8002.   6341.  1661.  1661.
# … with 26,294 more rows
ggplot() +
  geom_line(vic_elec_hour, mapping = aes(x = time, y = demand)) + 
  geom_line(aug, mapping = aes(x = time, y = .fitted), color = "blue", alpha = 0.8)